library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
library(haven)
library(mice)
##
## Attaching package: 'mice'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggpubr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(flextable)
## Warning: package 'flextable' was built under R version 4.3.3
##
## Attaching package: 'flextable'
##
## The following objects are masked from 'package:ggpubr':
##
## border, font, rotate
##
## The following object is masked from 'package:purrr':
##
## compose
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 4.3.3
library(sjmisc)
## Warning: package 'sjmisc' was built under R version 4.3.3
## Learn more about sjmisc with 'browseVignettes("sjmisc")'.
##
## Attaching package: 'sjmisc'
##
## The following object is masked from 'package:purrr':
##
## is_empty
##
## The following object is masked from 'package:tidyr':
##
## replace_na
##
## The following object is masked from 'package:tibble':
##
## add_case
library(sjlabelled)
## Warning: package 'sjlabelled' was built under R version 4.3.3
##
## Attaching package: 'sjlabelled'
##
## The following objects are masked from 'package:haven':
##
## as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
##
## The following object is masked from 'package:forcats':
##
## as_factor
##
## The following object is masked from 'package:dplyr':
##
## as_label
##
## The following object is masked from 'package:ggplot2':
##
## as_label
municipios <- read_xlsx("municipios.xlsx")
propiedad <- read_xlsx("propiedad.xlsx")
To grasp a better understanding of the database a descriptive analysis of some of the variables is performed.
The Housing Price Index measures the change in housing prices, starting from a base of 100 set in 2015. The average of the General HPI, which reflects the price of all homes on the market, is 125, indicating a 25% price increase since 2015.
The boxplots show that the General HPI has several outliers with a much higher value than the average. We also see that this is repeated in the cases of the HPI for Second-hand housing and New housing. The General HPI has a value much closer to the Second-hand HPI, which indicates that there are more sales of second-hand homes than new construction homes in the market.
The Autonomous Communities where housing prices have increased the most are Madrid, Balearic Islands, Barcelona, Ceuta, and Melilla.
# IPV
mean(municipios$ipv2021General)
## [1] 126.4497
mean(municipios$ipv2011General)
## [1] 126.4662
ggplot(municipios, aes(x = 1, y = ipv2021General, fill = "ipv2021General")) +
geom_boxplot(position = position_dodge(width = 0.8), fill = "Orange") +
geom_boxplot(aes(x = 2, y = ipv2021Nueva, fill = "ipv2021Nueva"), position = position_dodge(width = 0.8), fill = "Blue") +
geom_boxplot(aes(x = 3, y = ipv2021Segunda, fill = "ipv2021Segunda"), position = position_dodge(width = 0.8), fill = "Green") +
scale_x_continuous(breaks = c(1, 2, 3), labels = c("ipv2021General", "ipv2021Nueva", "ipv2021Segunda")) +
labs(y = "Índice de Precio de Viviendas",
title = "Distribución del Índice de Precio de Viviendas por Tipo en 2021") +
theme_minimal()
# top index value 2021
municipios |>
distinct(CCAA, .keep_all = T) |>
arrange(desc(ipv2021General)) |>
slice(1:5)
## # A tibble: 5 × 55
## CPRO CMUN cusec MUN_LITERAL PROV_LITERAL CCAA ipv2021General ipv2021Nueva
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 28 991 28991 <=2000 Madrid Comu… 147. 152.
## 2 07 993 07993 5001 <= 10000 Balears, Il… Isla… 144. 153.
## 3 51 001 51001 Ceuta Ceuta Ceuta 142. 142.
## 4 08 994 08994 10001 <= 200… Barcelona Cata… 142. 153.
## 5 52 001 52001 Melilla Melilla Meli… 140. 140.
## # ℹ 47 more variables: ipv2021Segunda <dbl>, ipv2011General <dbl>,
## # ipv2011Nueva <dbl>, ipv2011Segunda <dbl>, ipvGeneralDif <dbl>,
## # RentaMedia2021 <dbl>, PorcentajeJoven2011 <dbl>, PorcentajeJoven2021 <dbl>,
## # PorcentajeJovenDif <dbl>, HABITAT <chr>, PorcentajeVacias2011 <dbl>,
## # PorcentajeVacias2021 <dbl>, VaciasDif <dbl>,
## # PorcentajeSecundarias2011 <dbl>, PorcentajeSecundarias2021 <dbl>,
## # SecundariasDif <dbl>, PorcentajeTurísticas2021 <dbl>, Compra11Mun <dbl>, …
Average disposable/net personal income. The average value for municipalities is €21,236 per year. However, there is a lot of variation between municipalities. The municipality with the highest income is Pozuelo de Alarcón with a value of €57,977, while the lowest income is Tudela in Navarra with €13,443.
mean(municipios$RentaMedia2021)
## [1] 21236.49
municipios |>
arrange(desc(RentaMedia2021)) |>
slice(1:3) |>
select(PROV_LITERAL, MUN_LITERAL, RentaMedia2021)
## # A tibble: 3 × 3
## PROV_LITERAL MUN_LITERAL RentaMedia2021
## <chr> <chr> <dbl>
## 1 Madrid Pozuelo de Alarcón 57977
## 2 Madrid Boadilla del Monte 46480
## 3 Madrid Alcobendas 43720
municipios |>
arrange(RentaMedia2021) |>
slice(1:3) |>
select(PROV_LITERAL, MUN_LITERAL, RentaMedia2021)
## # A tibble: 3 × 3
## PROV_LITERAL MUN_LITERAL RentaMedia2021
## <chr> <chr> <dbl>
## 1 Navarra Tudela 13443
## 2 Navarra 2001 <= 5000 13580
## 3 Navarra 5001 <= 10000 13771
The average percentage of young population in municipalities in 2021 is 36%, while in 2011 it was 39%. The number of young people has decreased.
The municipaliy with the highest young population is Vícar in Almería, probably due to the high presence of young agriculture workers.
mean(municipios$PorcentajeJoven2021)
## [1] 36.77195
mean(municipios$PorcentajeJoven2011)
## [1] 39.9888
municipios |>
arrange(desc(PorcentajeJoven2021)) |>
slice(1:3) |>
select(PROV_LITERAL, MUN_LITERAL, PorcentajeJoven2021)
## # A tibble: 3 × 3
## PROV_LITERAL MUN_LITERAL PorcentajeJoven2021
## <chr> <chr> <dbl>
## 1 Almería Vícar 51.1
## 2 Melilla Melilla 50.5
## 3 Almería Níjar 49.6
municipios |>
arrange(PorcentajeJoven2021) |>
slice(1:3) |>
select(PROV_LITERAL, MUN_LITERAL, PorcentajeJoven2021)
## # A tibble: 3 × 3
## PROV_LITERAL MUN_LITERAL PorcentajeJoven2021
## <chr> <chr> <dbl>
## 1 Ourense <=2000 19.9
## 2 León <=2000 20.8
## 3 Lugo <=2000 21.0
#- # Descriptive Tenure Regimes
# Ownership
top_propiedad_2011 <- propiedad %>%
arrange(desc(Propiedad11Mun)) %>%
distinct(Propiedad11Mun, .keep_all = TRUE) |>
select(Propiedad11Mun, MUN_LITERAL, PROV_LITERAL, CCAA)
# Purchase
top_compra_2011 <- propiedad %>%
arrange(desc(Compra11Mun)) %>%
distinct(Compra11Mun, .keep_all = TRUE) |>
select(Compra11Mun, MUN_LITERAL, PROV_LITERAL, CCAA)
# Inheritance
top_herencia_2011 <- propiedad %>%
arrange(desc(Herencia11Mun)) %>%
distinct(cusec, .keep_all = TRUE) |>
select(Herencia11Mun, MUN_LITERAL, PROV_LITERAL, CCAA)
# Rent
top_alquiler_2011 <- propiedad %>%
arrange(desc(Alquiler11Mun)) %>%
distinct(cusec, .keep_all = TRUE) |>
select(Alquiler11Mun, MUN_LITERAL, PROV_LITERAL, CCAA)
# Other
top_otro_2011 <- propiedad %>%
arrange(desc(Otro11Mun)) %>%
distinct(cusec, .keep_all = TRUE) |>
select(Otro11Mun, MUN_LITERAL, PROV_LITERAL, CCAA)
top_propiedad_2011
## # A tibble: 531 × 4
## Propiedad11Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 93.9 Almansa Albacete Castilla-La Mancha
## 2 92.5 2001 <= 5000 Zamora Castilla y León
## 3 91.8 Petrer Alicante/Alacant Comunidad Valenciana
## 4 91.7 Martos Jaén Andalucía
## 5 91.1 Ibi Alicante/Alacant Comunidad Valenciana
## 6 91.0 Vall d'Uixó, la Castellón/Castelló Comunidad Valenciana
## 7 90.8 Arroyomolinos Madrid Comunidad de Madrid
## 8 90.6 Paiporta Valencia/València Comunidad Valenciana
## 9 90.3 Riba-roja de Túria Valencia/València Comunidad Valenciana
## 10 90.2 Puerto Real Cádiz Andalucía
## # ℹ 521 more rows
top_compra_2011
## # A tibble: 531 × 4
## Compra11Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 92.5 2001 <= 5000 Zamora Castilla y León
## 2 90.8 Arroyomolinos Madrid Comunidad de Madrid
## 3 90.8 Almansa Albacete Castilla-La Mancha
## 4 90.1 Petrer Alicante/Alacant Comunidad Valenciana
## 5 90 Alcalá de Guadaíra Sevilla Andalucía
## 6 89.7 Paiporta Valencia/València Comunidad Valenciana
## 7 89.5 Martos Jaén Andalucía
## 8 89.3 Morón de la Frontera Sevilla Andalucía
## 9 89.3 Dos Hermanas Sevilla Andalucía
## 10 88.6 Ibi Alicante/Alacant Comunidad Valenciana
## # ℹ 521 more rows
top_herencia_2011
## # A tibble: 584 × 4
## Herencia11Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 40 2001 <= 5000 Palmas, Las Canarias
## 2 33.3 <=2000 Palmas, Las Canarias
## 3 25 <=2000 Santa Cruz de Tenerife Canarias
## 4 17.9 <=2000 Pontevedra Galicia
## 5 17.8 <=2000 Ourense Galicia
## 6 17.1 Icod de los Vinos Santa Cruz de Tenerife Canarias
## 7 13.8 Alhaurín el Grande Málaga Andalucía
## 8 12.9 5001 <= 10000 Albacete Castilla-La Mancha
## 9 12.4 Redondela Pontevedra Galicia
## 10 12.3 <=2000 Lugo Galicia
## # ℹ 574 more rows
top_alquiler_2011
## # A tibble: 584 × 4
## Alquiler11Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 53.1 Sitges Barcelona Cataluña
## 2 52.3 Barcelona Barcelona Cataluña
## 3 49.7 Sant Cugat del Vallès Barcelona Cataluña
## 4 48.8 Girona Girona Cataluña
## 5 48.4 Maó Balears, Illes Islas Baleares
## 6 48.2 Mogán Palmas, Las Canarias
## 7 47.4 Santiago de Compostela Coruña, A Galicia
## 8 46.7 Lloret de Mar Girona Cataluña
## 9 46.4 Eivissa Balears, Illes Islas Baleares
## 10 44.2 Madrid Madrid Comunidad de Madrid
## # ℹ 574 more rows
top_otro_2011
## # A tibble: 584 × 4
## Otro11Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 33.3 <=2000 Palmas, Las Canarias
## 2 31.8 <=2000 Santa Cruz de Tenerife Canarias
## 3 29.6 <=2000 Málaga Andalucía
## 4 28.6 <=2000 Pontevedra Galicia
## 5 27.8 <=2000 Cáceres Extremadura
## 6 27.6 <=2000 Huelva Andalucía
## 7 25.4 <=2000 Lugo Galicia
## 8 24.7 <=2000 Jaén Andalucía
## 9 23.8 <=2000 Badajoz Extremadura
## 10 23.1 <=2000 Coruña, A Galicia
## # ℹ 574 more rows
# Ownership
top_propiedad_2011 <- propiedad |>
distinct(CPRO, .keep_all = TRUE) |>
arrange(desc(Propiedad11Prov)) |>
select(Herencia11Prov, PROV_LITERAL, CCAA)
# Inheritance
top_herencia_2011 <- propiedad |>
distinct(CPRO, .keep_all = TRUE) |>
arrange(desc(Herencia11Prov)) |>
select(Herencia11Prov, PROV_LITERAL, CCAA)
# Rent
top_alquiler_2011 <- propiedad %>%
arrange(desc(Alquiler11Prov)) %>%
distinct(CPRO, .keep_all = TRUE) |>
select(Alquiler11Prov, PROV_LITERAL, CCAA)
# Other
top_otro_2011 <- propiedad %>%
arrange(desc(Otro11Prov)) %>%
distinct(CPRO, .keep_all = TRUE) |>
select(Otro11Prov, PROV_LITERAL, CCAA)
top_compra_2011
## # A tibble: 531 × 4
## Compra11Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 92.5 2001 <= 5000 Zamora Castilla y León
## 2 90.8 Arroyomolinos Madrid Comunidad de Madrid
## 3 90.8 Almansa Albacete Castilla-La Mancha
## 4 90.1 Petrer Alicante/Alacant Comunidad Valenciana
## 5 90 Alcalá de Guadaíra Sevilla Andalucía
## 6 89.7 Paiporta Valencia/València Comunidad Valenciana
## 7 89.5 Martos Jaén Andalucía
## 8 89.3 Morón de la Frontera Sevilla Andalucía
## 9 89.3 Dos Hermanas Sevilla Andalucía
## 10 88.6 Ibi Alicante/Alacant Comunidad Valenciana
## # ℹ 521 more rows
top_herencia_2011
## # A tibble: 52 × 3
## Herencia11Prov PROV_LITERAL CCAA
## <dbl> <chr> <chr>
## 1 8.08 Ourense Galicia
## 2 5.76 Pontevedra Galicia
## 3 5.73 Jaén Andalucía
## 4 5.71 Cuenca Castilla-La Mancha
## 5 5.55 Lugo Galicia
## 6 5.54 Teruel Aragón
## 7 5.38 Cáceres Extremadura
## 8 5.28 Santa Cruz de Tenerife Canarias
## 9 4.55 Albacete Castilla-La Mancha
## 10 4.55 Badajoz Extremadura
## # ℹ 42 more rows
top_alquiler_2011
## # A tibble: 52 × 3
## Alquiler11Prov PROV_LITERAL CCAA
## <dbl> <chr> <chr>
## 1 40.4 Melilla Melilla
## 2 32.6 Girona Cataluña
## 3 31.4 Balears, Illes Islas Baleares
## 4 31.0 Barcelona Cataluña
## 5 30.7 Madrid Comunidad de Madrid
## 6 30.3 Teruel Aragón
## 7 27.5 Lleida Cataluña
## 8 26.4 Palmas, Las Canarias
## 9 26.4 Ceuta Ceuta
## 10 25.6 Huesca Aragón
## # ℹ 42 more rows
top_otro_2011
## # A tibble: 52 × 3
## Otro11Prov PROV_LITERAL CCAA
## <dbl> <chr> <chr>
## 1 19.5 Cáceres Extremadura
## 2 15.1 Jaén Andalucía
## 3 15.1 Ourense Galicia
## 4 14.9 Pontevedra Galicia
## 5 14.9 Lugo Galicia
## 6 14.8 Zamora Castilla y León
## 7 14.6 Badajoz Extremadura
## 8 14.6 Ceuta Ceuta
## 9 14.2 Teruel Aragón
## 10 14.2 Ávila Castilla y León
## # ℹ 42 more rows
# Ownership
top_propiedad_2011 <- propiedad |>
distinct(CPRO, .keep_all = TRUE) |>
arrange(desc(Propiedad11CCAA)) |>
select(Herencia11Prov, PROV_LITERAL, CCAA)
# Inheritance
top_herencia_2011 <- propiedad |>
distinct(CCAA, .keep_all = TRUE) |>
arrange(desc(Herencia11CCAA)) |>
select(Herencia11CCAA, CCAA)
# Rent
top_alquiler_2011 <- propiedad %>%
arrange(desc(Alquiler11CCAA)) %>%
distinct(CCAA, .keep_all = TRUE) |>
select(Alquiler11CCAA, CCAA)
# Other
top_otro_2011 <- propiedad %>%
arrange(desc(Otro11CCAA)) %>%
distinct(CCAA, .keep_all = TRUE) |>
select(Otro11CCAA, CCAA)
top_propiedad_2011
## # A tibble: 52 × 3
## Herencia11Prov PROV_LITERAL CCAA
## <dbl> <chr> <chr>
## 1 2.88 Alicante/Alacant Comunidad Valenciana
## 2 4.33 Castellón/Castelló Comunidad Valenciana
## 3 3.58 Valencia/València Comunidad Valenciana
## 4 1.93 Cantabria Cantabria
## 5 3.12 Murcia Comunidad de Murcia
## 6 1.26 Araba/Álava País Vasco
## 7 2.54 Gipuzkoa País Vasco
## 8 2.6 Bizkaia País Vasco
## 9 2.85 Almería Andalucía
## 10 2.91 Cádiz Andalucía
## # ℹ 42 more rows
top_compra_2011
## # A tibble: 531 × 4
## Compra11Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 92.5 2001 <= 5000 Zamora Castilla y León
## 2 90.8 Arroyomolinos Madrid Comunidad de Madrid
## 3 90.8 Almansa Albacete Castilla-La Mancha
## 4 90.1 Petrer Alicante/Alacant Comunidad Valenciana
## 5 90 Alcalá de Guadaíra Sevilla Andalucía
## 6 89.7 Paiporta Valencia/València Comunidad Valenciana
## 7 89.5 Martos Jaén Andalucía
## 8 89.3 Morón de la Frontera Sevilla Andalucía
## 9 89.3 Dos Hermanas Sevilla Andalucía
## 10 88.6 Ibi Alicante/Alacant Comunidad Valenciana
## # ℹ 521 more rows
top_herencia_2011
## # A tibble: 19 × 2
## Herencia11CCAA CCAA
## <dbl> <chr>
## 1 5.35 Galicia
## 2 4.88 Extremadura
## 3 4.07 Canarias
## 4 3.64 Islas Baleares
## 5 3.57 Andalucía
## 6 3.46 Comunidad Valenciana
## 7 3.12 Comunidad de Murcia
## 8 3.07 Asturias
## 9 3.02 Castilla-La Mancha
## 10 3 Aragón
## 11 2.74 Comunidad Foral de Navarra
## 12 2.38 Castilla y León
## 13 2.32 País Vasco
## 14 1.93 Cantabria
## 15 1.9 Cataluña
## 16 1.62 La Rioja
## 17 1.52 Ceuta
## 18 1.39 Melilla
## 19 1.26 Comunidad de Madrid
top_alquiler_2011
## # A tibble: 19 × 2
## Alquiler11CCAA CCAA
## <dbl> <chr>
## 1 40.4 Melilla
## 2 31.4 Islas Baleares
## 3 30.7 Comunidad de Madrid
## 4 29.8 Cataluña
## 5 26.4 Ceuta
## 6 25.4 Canarias
## 7 25.3 Asturias
## 8 23.8 Galicia
## 9 22.7 Aragón
## 10 19.5 La Rioja
## 11 17.0 Castilla y León
## 12 16.9 Comunidad Foral de Navarra
## 13 16.9 País Vasco
## 14 16.7 Castilla-La Mancha
## 15 15.6 Comunidad de Murcia
## 16 14.4 Cantabria
## 17 14.2 Comunidad Valenciana
## 18 13.5 Extremadura
## 19 12.7 Andalucía
top_otro_2011
## # A tibble: 19 × 2
## Otro11CCAA CCAA
## <dbl> <chr>
## 1 16.5 Extremadura
## 2 14.6 Ceuta
## 3 14.3 Galicia
## 4 11.6 Canarias
## 5 11.4 Andalucía
## 6 11.0 Castilla y León
## 7 10.5 Islas Baleares
## 8 9.77 Aragón
## 9 9.74 Asturias
## 10 9.16 Castilla-La Mancha
## 11 8.34 Cantabria
## 12 8.11 Comunidad Valenciana
## 13 7.88 Comunidad Foral de Navarra
## 14 7.74 La Rioja
## 15 7.28 Comunidad de Murcia
## 16 7.24 Melilla
## 17 6.99 Cataluña
## 18 6.66 País Vasco
## 19 6.15 Comunidad de Madrid
# Ownership
top_propiedad_2021 <- propiedad |>
distinct(cusec, .keep_all = TRUE) |>
arrange(desc(Propiedad21Mun)) |>
select(Propiedad21Mun, MUN_LITERAL, PROV_LITERAL, CCAA)
# Rent
top_alquiler_2021 <- propiedad |>
distinct(cusec, .keep_all = TRUE) |>
arrange(desc(Alquiler21Mun)) |>
select(Alquiler21Mun, MUN_LITERAL, PROV_LITERAL, CCAA)
# Other
top_otro_2021 <- propiedad |>
distinct(cusec, .keep_all = TRUE) |>
arrange(desc(Otro21Mun)) |>
select(Propiedad21Mun, MUN_LITERAL, PROV_LITERAL, CCAA)
top_propiedad_2021
## # A tibble: 584 × 4
## Propiedad21Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 90.9 Martos Jaén Andalucía
## 2 87.0 Andújar Jaén Andalucía
## 3 86.7 5001 <= 10000 Burgos Castilla y León
## 4 83.9 2001 <= 5000 Salamanca Castilla y León
## 5 81.6 Lebrija Sevilla Andalucía
## 6 81.2 2001 <= 5000 Zamora Castilla y León
## 7 80.1 <=2000 Jaén Andalucía
## 8 80 Úbeda Jaén Andalucía
## 9 79.5 10001 <= 20000 Jaén Andalucía
## 10 79.1 Alhaurín de la Torre Málaga Andalucía
## # ℹ 574 more rows
top_alquiler_2021
## # A tibble: 584 × 4
## Alquiler21Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 57.9 Santiago de Compostela Coruña, A Galicia
## 2 57.7 Girona Girona Cataluña
## 3 56.5 Barcelona Barcelona Cataluña
## 4 54.5 Vilanova i la Geltrú Barcelona Cataluña
## 5 53.6 Manresa Barcelona Cataluña
## 6 52.9 Ponferrada León Castilla y León
## 7 52.8 Granollers Barcelona Cataluña
## 8 52.7 Hospitalet de Llobregat, L' Barcelona Cataluña
## 9 52.3 Madrid Madrid Comunidad de Madrid
## 10 51.7 Mollet del Vallès Barcelona Cataluña
## # ℹ 574 more rows
top_otro_2021
## # A tibble: 584 × 4
## Propiedad21Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 20 2001 <= 5000 Palmas, Las Canarias
## 2 23.5 <=2000 Palmas, Las Canarias
## 3 25.6 <=2000 Santa Cruz de Tenerife Canarias
## 4 44.4 5001 <= 10000 Teruel Aragón
## 5 43.8 <=2000 Pontevedra Galicia
## 6 40.8 Oleiros Coruña, A Galicia
## 7 54.2 10001 <= 20000 Pontevedra Galicia
## 8 39.3 Teguise Palmas, Las Canarias
## 9 60.1 2001 <= 5000 Pontevedra Galicia
## 10 41.2 <=2000 Balears, Illes Islas Baleares
## # ℹ 574 more rows
# Ownership
top_propiedad_2021 <- propiedad |>
distinct(CPRO, .keep_all = TRUE) |>
arrange(desc(Propiedad21Prov)) |>
select(Propiedad21Prov, PROV_LITERAL, CCAA)
# Rent
top_alquiler_2021 <- propiedad |>
distinct(CPRO, .keep_all = TRUE) |>
arrange(desc(Alquiler21Prov)) |>
select(Alquiler21Prov, PROV_LITERAL, CCAA)
# Other
top_otro_2021 <- propiedad |>
distinct(CPRO, .keep_all = TRUE) |>
arrange(desc(Otro21Prov)) |>
select(Propiedad21Mun, PROV_LITERAL, CCAA)
top_propiedad_2021
## # A tibble: 52 × 3
## Propiedad21Prov PROV_LITERAL CCAA
## <dbl> <chr> <chr>
## 1 77.0 Jaén Andalucía
## 2 69.2 Málaga Andalucía
## 3 68.2 Córdoba Andalucía
## 4 68 Cádiz Andalucía
## 5 68.0 Sevilla Andalucía
## 6 67.9 Ciudad Real Castilla-La Mancha
## 7 67.3 Huelva Andalucía
## 8 66 Murcia Comunidad de Murcia
## 9 65.4 Albacete Castilla-La Mancha
## 10 64.1 Badajoz Extremadura
## # ℹ 42 more rows
top_alquiler_2021
## # A tibble: 52 × 3
## Alquiler21Prov PROV_LITERAL CCAA
## <dbl> <chr> <chr>
## 1 46.4 Melilla Melilla
## 2 46.0 Barcelona Cataluña
## 3 45.4 Madrid Comunidad de Madrid
## 4 42.6 Girona Cataluña
## 5 41.2 Araba/Álava País Vasco
## 6 39.9 Lleida Cataluña
## 7 38.2 Gipuzkoa País Vasco
## 8 38.0 Balears, Illes Islas Baleares
## 9 36.8 Zaragoza Aragón
## 10 36.8 Bizkaia País Vasco
## # ℹ 42 more rows
top_otro_2021
## # A tibble: 52 × 3
## Propiedad21Mun PROV_LITERAL CCAA
## <dbl> <chr> <chr>
## 1 65.3 Badajoz Extremadura
## 2 60.1 Pontevedra Galicia
## 3 56.8 Teruel Aragón
## 4 61.7 Coruña, A Galicia
## 5 44.2 Palmas, Las Canarias
## 6 71.3 Ourense Galicia
## 7 67.7 Lugo Galicia
## 8 58.5 Asturias Asturias
## 9 63.7 Córdoba Andalucía
## 10 69.3 Cantabria Cantabria
## # ℹ 42 more rows
# Ownership
top_propiedad_2021 <- propiedad |>
distinct(CCAA, .keep_all = TRUE) |>
arrange(desc(Propiedad21CCAA)) |>
select(Propiedad21CCAA, CCAA)
# Rent
top_propiedad_2021 <- propiedad |>
distinct(CCAA, .keep_all = TRUE) |>
arrange(desc(Alquiler21CCAA)) |>
select(Alquiler21CCAA, CCAA)
# Other
top_propiedad_2021 <- propiedad |>
distinct(CCAA, .keep_all = TRUE) |>
arrange(desc(Otro21CCAA)) |>
select(Otro21CCAA, CCAA)
top_propiedad_2021
## # A tibble: 19 × 2
## Otro21CCAA CCAA
## <dbl> <chr>
## 1 20.5 Galicia
## 2 20.1 Extremadura
## 3 18.2 Asturias
## 4 17.9 Canarias
## 5 17.1 Cantabria
## 6 15.4 Ceuta
## 7 14.4 Castilla y León
## 8 14.1 Islas Baleares
## 9 14.0 Andalucía
## 10 13.9 Comunidad Foral de Navarra
## 11 13.7 Castilla-La Mancha
## 12 11.9 Comunidad Valenciana
## 13 11.8 Aragón
## 14 11.4 Cataluña
## 15 11.2 La Rioja
## 16 10.0 Comunidad de Murcia
## 17 7.75 Comunidad de Madrid
## 18 6.8 Melilla
## 19 4.96 País Vasco
top_alquiler_2021
## # A tibble: 52 × 3
## Alquiler21Prov PROV_LITERAL CCAA
## <dbl> <chr> <chr>
## 1 46.4 Melilla Melilla
## 2 46.0 Barcelona Cataluña
## 3 45.4 Madrid Comunidad de Madrid
## 4 42.6 Girona Cataluña
## 5 41.2 Araba/Álava País Vasco
## 6 39.9 Lleida Cataluña
## 7 38.2 Gipuzkoa País Vasco
## 8 38.0 Balears, Illes Islas Baleares
## 9 36.8 Zaragoza Aragón
## 10 36.8 Bizkaia País Vasco
## # ℹ 42 more rows
top_otro_2021
## # A tibble: 52 × 3
## Propiedad21Mun PROV_LITERAL CCAA
## <dbl> <chr> <chr>
## 1 65.3 Badajoz Extremadura
## 2 60.1 Pontevedra Galicia
## 3 56.8 Teruel Aragón
## 4 61.7 Coruña, A Galicia
## 5 44.2 Palmas, Las Canarias
## 6 71.3 Ourense Galicia
## 7 67.7 Lugo Galicia
## 8 58.5 Asturias Asturias
## 9 63.7 Córdoba Andalucía
## 10 69.3 Cantabria Cantabria
## # ℹ 42 more rows
# Propiedad
top_propiedad_dif <- propiedad |>
distinct(cusec, .keep_all = TRUE) |>
arrange(desc(PropiedadDifMun)) |>
select(MUN_LITERAL, PROV_LITERAL, PropiedadDifMun, AlquilerDifMun, OtroDifMun, Herencia11Mun) |>
filter(PropiedadDifMun > 5)
# Alquiler
top_alquiler_dif <- propiedad |>
distinct(cusec, .keep_all = TRUE) |>
arrange(desc(AlquilerDifMun)) |>
select(Alquiler21Mun, MUN_LITERAL, PROV_LITERAL, CCAA)
# Otro
top_otro_dif <- propiedad |>
distinct(cusec, .keep_all = TRUE) |>
arrange(desc(OtroDifMun)) |>
select(Propiedad21Mun, MUN_LITERAL, PROV_LITERAL, CCAA)
top_propiedad_dif
## # A tibble: 23 × 6
## MUN_LITERAL PROV_LITERAL PropiedadDifMun AlquilerDifMun OtroDifMun
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Nerja Málaga 25.8 -28 2.23
## 2 <=2000 Jaén 13.8 -1.24 -12.5
## 3 Úbeda Jaén 13.5 -10.4 -3.15
## 4 2001 <= 5000 Huesca 11.6 -14.2 2.62
## 5 Alhama de Murcia Murcia 11.4 -7 -4.43
## 6 5001 <= 10000 Burgos 9.83 -12.9 3.07
## 7 <=2000 Lugo 9.78 -10.2 0.450
## 8 <=2000 Murcia 9.27 4.85 -14.1
## 9 Sant Josep de sa Tala… Balears, Il… 9.2 -8.54 -0.65
## 10 Maó Balears, Il… 8.32 -5.82 -2.51
## # ℹ 13 more rows
## # ℹ 1 more variable: Herencia11Mun <dbl>
top_alquiler_dif
## # A tibble: 584 × 4
## Alquiler21Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 47.0 2001 <= 5000 Araba/Álava País Vasco
## 2 43.2 <=2000 Araba/Álava País Vasco
## 3 44.6 Alfafar Valencia/València Comunidad Valenciana
## 4 51.7 Mollet del Vallès Barcelona Cataluña
## 5 37.0 Picassent Valencia/València Comunidad Valenciana
## 6 36.5 Catarroja Valencia/València Comunidad Valenciana
## 7 52.9 Ponferrada León Castilla y León
## 8 35.8 Alaquàs Valencia/València Comunidad Valenciana
## 9 43.1 Valdemoro Madrid Comunidad de Madrid
## 10 37.8 Barberà del Vallès Barcelona Cataluña
## # ℹ 574 more rows
top_otro_dif
## # A tibble: 584 × 4
## Propiedad21Mun MUN_LITERAL PROV_LITERAL CCAA
## <dbl> <chr> <chr> <chr>
## 1 20 2001 <= 5000 Palmas, Las Canarias
## 2 39.3 Teguise Palmas, Las Canarias
## 3 40.8 Oleiros Coruña, A Galicia
## 4 44.4 5001 <= 10000 Teruel Aragón
## 5 23.5 <=2000 Palmas, Las Canarias
## 6 25.6 <=2000 Santa Cruz de Tenerife Canarias
## 7 63.1 Don Benito Badajoz Extremadura
## 8 81.2 2001 <= 5000 Zamora Castilla y León
## 9 64.0 Villanueva de la Serena Badajoz Extremadura
## 10 58.5 Estrada, A Pontevedra Galicia
## # ℹ 574 more rows
# Save top dif table
ft <- flextable(top_propiedad_dif)
ft <- autofit(ft)
save_as_image(x = ft, path = paste0(getwd(), "/tablatopdif.png"))
## [1] "C:/Users/alici/OneDrive/Documentos/Clase/Máster/Bcol/tablatopdif.png"
# Save bot dif table
bot_propiedad_dif <- propiedad |>
distinct(cusec, .keep_all = TRUE) |>
arrange(PropiedadDifMun) |>
select(MUN_LITERAL, PROV_LITERAL, PropiedadDifMun, AlquilerDifMun, OtroDifMun) |>
filter(PropiedadDifMun < -28)
ft <- flextable(bot_propiedad_dif)
ft <- autofit(ft)
save_as_image(x = ft, path = paste0(getwd(), "/tablabotdif.png"))
## [1] "C:/Users/alici/OneDrive/Documentos/Clase/Máster/Bcol/tablabotdif.png"
# Graphs
ggplot(top_propiedad_dif, aes(x = reorder(MUN_LITERAL, PropiedadDifMun), y = PropiedadDifMun, fill = "Propiedad")) +
geom_bar(stat = "identity", position = "dodge") +
geom_bar(aes(y = AlquilerDifMun, fill = "Alquiler"), stat = "identity", position = "dodge") +
geom_bar(aes(y = OtroDifMun, fill = "Otro"), stat = "identity", position = "dodge") +
scale_fill_manual(values = c("Propiedad" = "skyblue2", "Alquiler" = "green3", "Otro" = "red2")) +
labs(title = "Cambios en Propiedad, Alquiler y Otro por Municipio",
x = "Municipio",
y = "Cambios",
fill = "Tipo de Cambio") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(title = "Tipo de Cambio")) +
coord_flip()+
theme_minimal()
ggplot(bot_propiedad_dif, aes(x = reorder(MUN_LITERAL, PropiedadDifMun), y = PropiedadDifMun, fill = "Propiedad")) +
geom_bar(stat = "identity", position = "dodge") +
geom_bar(aes(y = AlquilerDifMun, fill = "Alquiler"), stat = "identity", position = "dodge") +
geom_bar(aes(y = OtroDifMun, fill = "Otro"), stat = "identity", position = "dodge") +
scale_fill_manual(values = c("Propiedad" = "skyblue2", "Alquiler" = "green3", "Otro" = "red2")) +
labs(title = "Cambios en Propiedad, Alquiler y Otro por Municipio",
x = "Municipio",
y = "Cambios",
fill = "Tipo de Cambio") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(title = "Tipo de Cambio")) +
coord_flip()+
theme_minimal()
library(mapSpain)
esp_can <- esp_get_country()
can_prov <- esp_get_can_provinces()
can_box <- esp_get_can_box()
munic <- esp_get_munic()
provic <- esp_get_prov() |>
mutate(CPRO = cpro)
# Ownership
mapa <- propiedad |>
select(CPRO, Propiedad11Prov) |>
distinct(CPRO, .keep_all = TRUE)
mapa <- provic |>
left_join(mapa, by = "CPRO")
p1 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Propiedad11Prov), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Propiedad11Prov, 1), color = ifelse(Propiedad11Prov > 75, "white", "black")),
size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") +
theme_void() +
labs(title = "% of Young People in 'Property' Residence Status in 2011") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1) +
scale_color_identity()
# Inheritance or donation
mapa <- propiedad |>
select(CPRO, Herencia11Prov) |>
distinct(CPRO, .keep_all = TRUE)
mapa <- provic |>
left_join(mapa, by = "CPRO")
p2 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Herencia11Prov), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Herencia11Prov, 1)),
color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") +
theme_void() +
labs(title = "% of Young People in 'Inheritance/Donation' Residence Status in 2011") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1)
# Rent
mapa <- propiedad |>
select(CPRO, Alquiler11Prov) |>
distinct(CPRO, .keep_all = TRUE)
mapa <- provic |>
left_join(mapa, by = "CPRO")
p3 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Alquiler11Prov), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Alquiler11Prov, 1)),
color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") +
theme_void() +
labs(title = "% of Young People in 'Rent' Residence Status in 2011") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1)
# Other
mapa <- propiedad |>
select(CPRO, Otro11Prov) |>
distinct(CPRO, .keep_all = TRUE)
mapa <- provic |>
left_join(mapa, by = "CPRO")
p4 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Otro11Prov), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Otro11Prov, 1)),
color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") +
theme_void() +
labs(title = "% of Young People in 'Other' Residence Status in 2011") +
theme(
legend.position = "none",
plot.title = element_text(family = "Times New Roman", size = 14),
text = element_text(family = "Times New Roman"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1)
# Save as image together
p1 <- p1 + theme(plot.margin = margin(20, 0, 5, 0))
p2 <- p2 + theme(plot.margin = margin(20, 0, 5, 0))
p3 <- p3 + theme(plot.margin = margin(5, 0, 5, 0))
p4 <- p4 + theme(plot.margin = margin(5, 0, 5, 0))
# Add individual titles to each plot
p1 <- p1 + labs(title = "Ownership") +
theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p2 <- p2 + labs(title = "Inheritance", family = "Times New roman") +
theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p3 <- p3 + labs(title = "Rent") +
theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p4 <- p4 + labs(title = "Other") +
theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
# Add a general title
combined_plot <- ggarrange(p1, p2, p3, p4, ncol = 2, nrow = 2, common.legend = FALSE) +
ggtitle("% of Young people per Tenure Regime in 2011") +
theme(plot.title = element_text(hjust = 0.5, size = 18, family = "Times New Roman", face = "bold")) # Centramos el título general
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
combined_plot <- annotate_figure(combined_plot, bottom = text_grob("Source: Self-elaborated from Census data", hjust = 0, x = 0, size = 14, color = "#555555", family = "Times New Roman"))
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
# Save as image
#ggsave("2011.png", plot = combined_plot, width = 12, height = 8, bg = "white")
combined_plot
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
library(mapSpain)
esp_can <- esp_get_country()
can_prov <- esp_get_can_provinces()
can_box <- esp_get_can_box()
munic <- esp_get_munic()
ccaa <- esp_get_ccaa()
ccaa <- ccaa |>
mutate(CCAA = ine.ccaa.name)
ccaa <- ccaa |>
mutate(CCAA = ine.ccaa.name) |>
mutate(CCAA = case_when(
CCAA == "Asturias, Principado de" ~ "Asturias",
CCAA == "Balears, Illes" ~ "Islas Baleares",
CCAA == "Castilla - La Mancha" ~ "Castilla-La Mancha",
CCAA == "Comunitat Valenciana" ~ "Comunidad Valenciana",
CCAA == "Madrid, Comunidad de" ~ "Comunidad de Madrid",
CCAA == "Murcia, Región de" ~ "Comunidad de Murcia",
CCAA == "Navarra, Comunidad Foral de" ~ "Comunidad Foral de Navarra",
CCAA == "Rioja, La" ~ "La Rioja",
TRUE ~ CCAA
))
# Ownership
mapa <- propiedad |>
select(CCAA, Propiedad11CCAA) |>
distinct(CCAA, .keep_all = TRUE)
mapa <- ccaa |>
left_join(mapa, by = "CCAA")
p1 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Propiedad11CCAA), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Propiedad11CCAA, 1), color = ifelse(Propiedad11CCAA > 75, "white", "black")),
size = 2.4, check_overlap = TRUE, nudge_y = -0.03) +
theme_void() +
labs(title = "% of Young People in 'Property' Residence Status in 2011") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri"),
plot.background = element_rect(fill = "white", color = NA), # Fondo blanco para la figura
panel.background = element_rect(fill = "white", color = NA) # Fondo blanco para el panel
) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1) +
scale_color_identity()
# Herencia o donación
mapa <- propiedad |>
select(CCAA, Herencia11CCAA) |>
distinct(CCAA, .keep_all = TRUE)
mapa <- ccaa |>
left_join(mapa, by = "CCAA")
p2 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Herencia11CCAA), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Herencia11CCAA, 1)),
color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03) +
theme_void() +
labs(title = "% of Young People in 'Inheritance/Donation' Residence Status in 2011") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1)
# Rent
mapa <- propiedad |>
select(CCAA, Alquiler11CCAA) |>
distinct(CCAA, .keep_all = TRUE)
mapa <- ccaa |>
left_join(mapa, by = "CCAA")
p3 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Alquiler11CCAA), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Alquiler11CCAA, 1)),
color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03) +
theme_void() +
labs(title = "% of Young People in 'Rent' Residence Status in 2011") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1)
# Other
mapa <- propiedad |>
select(CCAA, Otro11CCAA) |>
distinct(CCAA, .keep_all = TRUE)
mapa <- ccaa |>
left_join(mapa, by = "CCAA")
p4 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Otro11CCAA), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Otro11CCAA, 1)),
color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03) +
theme_void() +
labs(title = "% of Young People in 'Other' Residence Status in 2011") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1)
p1 <- p1 + theme(plot.margin = margin(5, 5, 5, 5))
p2 <- p2 + theme(plot.margin = margin(5, 5, 5, 5))
p3 <- p3 + theme(plot.margin = margin(5, 5, 5, 5))
p4 <- p4 + theme(plot.margin = margin(5, 5, 5, 5))
combined_plot <- ggarrange(p1, p2, p3, p4, ncol = 2, nrow = 2)
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Calibri' not found in PostScript font database
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
library(mapSpain)
esp_can <- esp_get_country()
can_prov <- esp_get_can_provinces()
can_box <- esp_get_can_box()
munic <- esp_get_munic()
provic <- esp_get_prov() |>
mutate(CPRO = cpro)
mapa <- propiedad |>
select(CPRO, Propiedad21Prov) |>
distinct(CPRO, .keep_all = TRUE)
mapa <- provic |>
left_join(mapa, by = "CPRO")
# Ownership
p5 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Propiedad21Prov), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Propiedad21Prov, 1), color = ifelse(Propiedad21Prov > 75, "white", "black")),
size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") +
theme_void() +
labs(title = "% of Young People in 'Property' Residence Status in 2021") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri"),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA)
) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1) +
scale_color_identity()
# Rent
mapa <- propiedad |>
select(CPRO, Alquiler21Prov) |>
distinct(CPRO, .keep_all = TRUE)
mapa <- provic |>
left_join(mapa, by = "CPRO")
p6 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Alquiler21Prov), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Alquiler21Prov, 1),
color = ifelse(Alquiler21Prov > 38, "white", "black")),
size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") +
theme_void() +
labs(title = "% of Young People in 'Rent' Residence Status in 2021") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri")) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1) +
scale_color_identity()
# Other
mapa <- propiedad |>
select(CPRO, Otro21Prov) |>
distinct(CPRO, .keep_all = TRUE)
mapa <- provic |>
left_join(mapa, by = "CPRO")
p7 <-ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = Otro21Prov), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(Otro21Prov, 1), color = ifelse(Otro21Prov > 19, "white", "black")), size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") +
theme_void() +
labs(title = "% of Young People in 'Other' Residence Status in 2021") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri")) +
coord_sf() +
scale_fill_viridis_c(option = "C", direction = -1) +
scale_color_identity()
# Save as image together
p5 <- p5 + theme(plot.margin = margin(20, 0, 5, 0))
p6 <- p6 + theme(plot.margin = margin(20, 0, 5, 0))
p7 <- p7 + theme(plot.margin = margin(5, 0, 5, 0))
# Add individual titles
p5 <- p5 + labs(title = "Ownership") +
theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p6 <- p6 + labs(title = "Rent", family = "Times New roman") +
theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p7 <- p7 + labs(title = "Other") +
theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
# Add general title
combined_plot <- ggarrange(p5, p6, p7, ncol = 2, nrow = 2, common.legend = FALSE) +
ggtitle("% of Young people per Tenure Regime in 2021") +
theme(plot.title = element_text(hjust = 0.5, size = 18, family = "Times New Roman", face = "bold"))
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
combined_plot <- annotate_figure(combined_plot, bottom = text_grob("Source: Self-elaborated from Census data", hjust = 0, x = 0, size = 14, color = "#555555", family = "Times New Roman"))
# Save as image
#ggsave("2021.png", plot = combined_plot, width = 12, height = 8, bg = "white")
combined_plot
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
library(mapSpain)
esp_can <- esp_get_country()
can_prov <- esp_get_can_provinces()
can_box <- esp_get_can_box()
munic <- esp_get_munic()
munic <- munic |>
mutate(cusec = LAU_CODE) |>
select(geometry, cusec)
# Ownership
mapa <- propiedad |>
select(cusec, PropiedadDifMun)
mapa <- munic |>
left_join(mapa, by = "cusec")
mapa <- mapa |> filter(!is.na(PropiedadDifMun))
pa <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = PropiedadDifMun), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
theme_void() +
labs(title = "Difference of % of Young People in 'Property' Residence Status in 2011 to 2021") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri")
) +
coord_sf() +
scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0)
# Rent
mapa <- propiedad |>
select(cusec, AlquilerDifMun)
mapa <- munic |>
left_join(mapa, by = "cusec")
mapa <- mapa |> filter(!is.na(AlquilerDifMun))
pb <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = AlquilerDifMun), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
theme_void() +
labs(title = "Difference of % of Young People in 'Rent' Residence Status in 2011 to 2021") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri")
) +
coord_sf() +
scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0)
ggarrange(pa, pb, ncol = 2, nrow = 1)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
library(mapSpain)
esp_can <- esp_get_country()
can_prov <- esp_get_can_provinces()
can_box <- esp_get_can_box()
munic <- esp_get_munic()
provic <- esp_get_prov() |>
mutate(CPRO = cpro)
# Propiedad
mapa <- propiedad |>
select(CPRO, PropiedadDifProv) |>
distinct(CPRO, .keep_all = TRUE)
mapa <- provic |>
left_join(mapa, by = "CPRO")
p8 <- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = PropiedadDifProv), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(PropiedadDifProv, 1)),
color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03) +
theme_void() +
labs(title = "Difference of % of Young People in 'Property' Residence Status in 2011 to 2021") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri")
) +
coord_sf() +
scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0)
# Rent
mapa <- propiedad |>
select(CPRO, AlquilerDifProv) |>
distinct(CPRO, .keep_all = TRUE)
mapa <- provic |>
left_join(mapa, by = "CPRO")
p9<- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = AlquilerDifProv), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(AlquilerDifProv, 1)), color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") +
theme_void() +
labs(title = "Difference of % of Young People in 'Rent' Residence Status in 2011 to 2021") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri")) +
coord_sf() +
scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0)
# Other
mapa <- propiedad |>
select(CPRO, OtroDifProv) |>
distinct(CPRO, .keep_all = TRUE)
mapa <- provic |>
left_join(mapa, by = "CPRO")
p10<- ggplot(esp_can) +
geom_sf(color = "black", size = 0.7) +
geom_sf(data = can_prov) +
geom_sf(data = can_box) +
geom_sf(data = mapa, aes(fill = OtroDifProv), color = "black", size = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf, alpha = 0.2, fill = "white") +
geom_sf_text(data = mapa, aes(label = round(OtroDifProv, 1)), color = "black", size = 2.4, check_overlap = TRUE, nudge_y = -0.03, family = "Montserrat") +
theme_void() +
labs(title = "Difference of % of Young People in 'Rent' Residence Status in 2011 to 2021") +
theme(
legend.position = "none",
plot.title = element_text(family = "Calibri", size = 14),
text = element_text(family = "Calibri")) +
coord_sf() +
scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0)
# Save as image together
p8 <- p8 + theme(plot.margin = margin(10, 0, 0, -30))
p9 <- p9 + theme(plot.margin = margin(0, 0, 0, -30))
# Add individual titles
p8 <- p8 + labs(title = "Ownership") +
theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
p9 <- p9 + labs(title = "Rent", family = "Times New roman") +
theme(plot.title = element_text(hjust = 0.6, family = "Times New Roman", size = 14))
# Add title
combined_plot <- ggarrange(p8, p9, ncol = 1, nrow = 2, common.legend = FALSE) +
ggtitle("Difference of % of Young people per Tenure Regime in 2011 to 2021") +
theme(plot.title = element_text(hjust = 0.5, size = 14, family = "Times New Roman", face = "bold")) # Centramos el título general
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
combined_plot <- annotate_figure(combined_plot, bottom = text_grob("Source: Self-elaborated from Census data", hjust = 0.5, x = 0.5, size = 12, color = "#555555", family = "Times New Roman"))
# Save as image
#ggsave("difpropiedadalquiler.png", plot = combined_plot, width = 12, height = 8, bg = "white")
combined_plot
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
# - # Multilevel model
# Check we have the data
municipios <- read_xlsx("municipios.xlsx")
numeric_data <- municipios %>%
ungroup() |>
select_if(is.numeric)
# Scale
numeric_data <- scale(numeric_data)
numeric_data <- as.data.frame(numeric_data)
#Add CPRO and CCAA columns
numeric_data <- numeric_data %>%
mutate(id = row_number())
cpros <- municipios |>
select(cusec, CPRO, CCAA) |>
mutate(id = row_number())
numeric_data <- numeric_data |>
left_join(cpros, by = "id") |>
select(-id)
# Select variables
independientes <- numeric_data |>
select(CCAA, CPRO, cusec, PropiedadDifMun, ipvGeneralDif, PorcentajeJovenDif, VaciasDif, SecundariasDif, PorcentajeEspañolaDif, PorcentajeEuropeaDif, PorcentajeNoEuropeaDif, DifPrecioHipotecaProv, ParoJuvenilDif, ProtegidaDif, RentaMedia2021, PorcentajeTurísticas2021, PrecioAlquilerM2, Herencia11Mun) |>
# Feature engineering
mutate(PrecioAlquilerM22=PrecioAlquilerM2^2,
RentaMedia2021_2 = RentaMedia2021^2,
PorcentajeTurísticas2021_2 = PorcentajeTurísticas2021^2,
DifPrecioHipotecaProv2 = DifPrecioHipotecaProv^2,
Herencia11Mun2 = (Herencia11Mun)^2,
VaciasDif2 = VaciasDif^2,
RentaMedia2021_exp = exp(RentaMedia2021),
PorcentajeEspañolaDif2 = PorcentajeEspañolaDif^2,
PorcentajeEuropeaDif2 = PorcentajeEuropeaDif^2,
PorcentajeNoEuropeaDif2 = PorcentajeNoEuropeaDif^2,
SecundariasDif2 = (SecundariasDif)^2)
variables <- independientes |>
select(CCAA, CPRO, PropiedadDifMun, Herencia11Mun, PorcentajeJovenDif, VaciasDif, SecundariasDif, PorcentajeNoEuropeaDif, DifPrecioHipotecaProv, ParoJuvenilDif, ProtegidaDif, PorcentajeTurísticas2021, PrecioAlquilerM2, RentaMedia2021_2, PrecioAlquilerM22, DifPrecioHipotecaProv2, Herencia11Mun2, PorcentajeEspañolaDif2, PorcentajeEuropeaDif2, RentaMedia2021, PorcentajeTurísticas2021_2)
The model itself
modelo_multi <- lmer(PropiedadDifMun ~ PorcentajeJovenDif + PorcentajeEspañolaDif2 + PorcentajeEspañolaDif2 + PorcentajeNoEuropeaDif + RentaMedia2021 + VaciasDif + PorcentajeTurísticas2021 + PorcentajeTurísticas2021_2 + PrecioAlquilerM2 + + PrecioAlquilerM22 + Herencia11Mun + Herencia11Mun2 + (1|CPRO) + (1|CCAA), data = variables)
summary(modelo_multi)
## Linear mixed model fit by REML ['lmerMod']
## Formula: PropiedadDifMun ~ PorcentajeJovenDif + PorcentajeEspañolaDif2 +
## PorcentajeEspañolaDif2 + PorcentajeNoEuropeaDif + RentaMedia2021 +
## VaciasDif + PorcentajeTurísticas2021 + PorcentajeTurísticas2021_2 +
## PrecioAlquilerM2 + +PrecioAlquilerM22 + Herencia11Mun + Herencia11Mun2 +
## (1 | CPRO) + (1 | CCAA)
## Data: variables
##
## REML criterion at convergence: 1408
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3090 -0.6050 -0.0046 0.6125 4.1847
##
## Random effects:
## Groups Name Variance Std.Dev.
## CPRO (Intercept) 0.11625 0.3410
## CCAA (Intercept) 0.04024 0.2006
## Residual 0.53533 0.7317
## Number of obs: 584, groups: CPRO, 52; CCAA, 19
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.125118 0.090270 -1.386
## PorcentajeJovenDif 0.154497 0.039257 3.936
## PorcentajeEspañolaDif2 0.098858 0.019585 5.048
## PorcentajeNoEuropeaDif -0.287264 0.054263 -5.294
## RentaMedia2021 0.117529 0.048264 2.435
## VaciasDif 0.133401 0.043981 3.033
## PorcentajeTurísticas2021 0.233317 0.070700 3.300
## PorcentajeTurísticas2021_2 -0.032171 0.010165 -3.165
## PrecioAlquilerM2 -0.326464 0.071356 -4.575
## PrecioAlquilerM22 0.167857 0.029070 5.774
## Herencia11Mun 0.126894 0.063379 2.002
## Herencia11Mun2 -0.054791 0.008316 -6.589
## Warning in abbreviate(rn, minlength = 11): abreviatura utilizada con caracteres
## no ASCII
##
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abreviatura utilizada con caracteres
## no ASCII
## Warning in abbreviate(rn, minlength = 6): abreviatura utilizada con caracteres
## no ASCII
## (Intr) PrcnJD PrEñD2 PrcNED RM2021 VacsDf PrT2021 PT2021_ PrcAM2
## PrcntjJvnDf -0.067
## PrcntjEsñD2 -0.191 0.120
## PrcntjNErpD 0.143 -0.367 -0.613
## RentaMd2021 0.054 -0.115 -0.024 0.211
## VaciasDif 0.049 -0.192 -0.023 0.215 0.188
## PrcntjT2021 0.053 0.033 -0.064 -0.019 -0.002 -0.048
## PrcnT2021_2 -0.082 -0.062 -0.055 0.106 0.036 0.003 -0.820
## PrecAlqlrM2 0.236 -0.066 0.039 -0.056 -0.316 0.111 -0.257 0.110
## PrcAlqlrM22 -0.350 -0.029 -0.051 0.041 -0.043 -0.180 0.125 -0.041 -0.627
## Herenci11Mn 0.151 -0.180 0.010 0.067 0.084 -0.244 -0.273 0.166 0.339
## Herenc11Mn2 -0.138 0.169 0.007 -0.099 -0.088 0.046 0.179 -0.082 -0.184
## PrAM22 Hrn11M
## PrcntjJvnDf
## PrcntjEsñD2
## PrcntjNErpD
## RentaMd2021
## VaciasDif
## PrcntjT2021
## PrcnT2021_2
## PrecAlqlrM2
## PrcAlqlrM22
## Herenci11Mn -0.240
## Herenc11Mn2 0.128 -0.769
# Test multicolinearity
vif(modelo_multi)
## PorcentajeJovenDif PorcentajeEspañolaDif2
## 1.277609 1.725807
## PorcentajeNoEuropeaDif RentaMedia2021
## 2.053334 1.478132
## VaciasDif PorcentajeTurísticas2021
## 1.517233 3.657516
## PorcentajeTurísticas2021_2 PrecioAlquilerM2
## 3.356258 2.551298
## PrecioAlquilerM22 Herencia11Mun
## 1.929337 3.661844
## Herencia11Mun2
## 2.719771
# Get table
tab_model(modelo_multi, show.ci = F, show.se = T, pred.labels = c("(Intercept)", "Young Population Diff", "Spanish Population Diff ^2", "European Population Diff ^2", "Non-European Population Diff", "Average Income 2021", "Empty Dwellings Diff", "Tourist Dwellings 2021", "Tourist Dwellings 2021", "M2 Rental Price 2021", "M2 Rental Price 2021 ^2", "Inheritance 2011", "Inheritance 2011 ^2"))
## Length of `pred.labels` does not equal number of predictors, no labelling applied.
| Propiedad Dif Mun | |||
|---|---|---|---|
| Predictors | Estimates | std. Error | p |
| (Intercept) | -0.13 | 0.09 | 0.166 |
| PorcentajeJovenDif | 0.15 | 0.04 | <0.001 |
| PorcentajeEspañolaDif2 | 0.10 | 0.02 | <0.001 |
| PorcentajeNoEuropeaDif | -0.29 | 0.05 | <0.001 |
| RentaMedia2021 | 0.12 | 0.05 | 0.015 |
| VaciasDif | 0.13 | 0.04 | 0.003 |
| PorcentajeTurísticas2021 | 0.23 | 0.07 | 0.001 |
| PorcentajeTurísticas2021_2 | -0.03 | 0.01 | 0.002 |
| PrecioAlquilerM2 | -0.33 | 0.07 | <0.001 |
| PrecioAlquilerM22 | 0.17 | 0.03 | <0.001 |
| Herencia11Mun | 0.13 | 0.06 | 0.046 |
| Herencia11Mun2 | -0.05 | 0.01 | <0.001 |
| Random Effects | |||
| σ2 | 0.54 | ||
| τ00 CPRO | 0.12 | ||
| τ00 CCAA | 0.04 | ||
| ICC | 0.23 | ||
| N CPRO | 52 | ||
| N CCAA | 19 | ||
| Observations | 584 | ||
| Marginal R2 / Conditional R2 | 0.268 / 0.433 | ||
# Table of random effects intercepts
ranef(modelo_multi)
## $CPRO
## (Intercept)
## 01 -0.3394201888
## 02 -0.0539252098
## 03 -0.1102050355
## 04 0.0954522909
## 05 -0.0030815013
## 06 -0.2778106851
## 07 0.0571203794
## 08 -0.4568979505
## 09 0.1673043619
## 10 0.2786687013
## 11 0.0493371339
## 12 0.2945304340
## 13 -0.2341137245
## 14 -0.5279151647
## 15 -0.3523170103
## 16 0.2185750495
## 17 0.0978208873
## 18 0.1330674041
## 19 -0.0436789962
## 20 0.1694214666
## 21 -0.0239980750
## 22 0.1978326981
## 23 0.3980492004
## 24 -0.3650156094
## 25 -0.1456538803
## 26 0.3115981280
## 27 0.3179673614
## 28 -0.0600658908
## 29 0.6199853744
## 30 0.3768622041
## 31 -0.0761234298
## 32 0.1305461700
## 33 0.0258548810
## 34 -0.1110030877
## 35 -0.3423124471
## 36 -0.0178135566
## 37 0.2895817566
## 38 -0.2331998622
## 39 -0.2960146211
## 40 0.1918394903
## 41 0.0721591718
## 42 -0.0523262097
## 43 0.0731430582
## 44 0.1325878437
## 45 0.1361283070
## 46 -0.5842958011
## 47 -0.1406036110
## 48 -0.0973106305
## 49 0.0001320626
## 50 -0.2655381377
## 51 0.2133976247
## 52 0.1616768755
##
## $CCAA
## (Intercept)
## Andalucía 0.2824937152
## Aragón 0.0224580724
## Asturias 0.0089492798
## Canarias -0.1992049662
## Cantabria -0.1024610276
## Castilla-La Mancha 0.0079560609
## Castilla y León -0.0080207611
## Cataluña -0.1493876825
## Ceuta 0.0738643917
## Comunidad de Madrid -0.0207909085
## Comunidad de Murcia 0.1304452076
## Comunidad Foral de Navarra -0.0263489851
## Comunidad Valenciana -0.1384437644
## Extremadura 0.0002969894
## Galicia 0.0271310892
## Islas Baleares 0.0197713638
## La Rioja 0.1078550251
## Melilla 0.0559620290
## País Vasco -0.0925251288
##
## with conditional variances for "CPRO" "CCAA"
random_effects <- ranef(modelo_multi)
random_effects_df <- as.data.frame(random_effects)
# Add labels
locations <- read_excel("vivienda turística/exp_viv_turistica_tabla5_FEB2021.xlsx", sheet = 3, na = "Dato protegido por secreto estadístico.")
nombres_provincias <- locations |>
select(PROV_LITERAL, PROV) |>
rename(CPRO = PROV) |>
distinct(CPRO, .keep_all = TRUE)
nombres <- nombres_provincias$PROV_LITERAL
rownames(random_effects_df)[1:52] <- nombres[1:52]
#ccaa
ccaa <- unique(municipios$CCAA)
ccaa <- as.data.frame(ccaa)
ccaa <- ccaa %>%
mutate(ccaa = if_else(ccaa %in% nombres, paste0(ccaa, " (CCAA)"), ccaa))
# Final RF table
tab_df(random_effects_df,
title = "Random Effects Estimates",
show.rownames = TRUE)
| Row | grpvar | term | grp | condval | condsd |
|---|---|---|---|---|---|
| Araba/Álava | CPRO | (Intercept) | 01 | -0.34 | 0.26 |
| Albacete | CPRO | (Intercept) | 02 | -0.05 | 0.22 |
| Alicante/Alacant | CPRO | (Intercept) | 03 | -0.11 | 0.18 |
| Almería | CPRO | (Intercept) | 04 | 0.10 | 0.21 |
| Ávila | CPRO | (Intercept) | 05 | -0.00 | 0.26 |
| Badajoz | CPRO | (Intercept) | 06 | -0.28 | 0.23 |
| Balears, Illes | CPRO | (Intercept) | 07 | 0.06 | 0.21 |
| Barcelona | CPRO | (Intercept) | 08 | -0.46 | 0.16 |
| Burgos | CPRO | (Intercept) | 09 | 0.17 | 0.24 |
| Cáceres | CPRO | (Intercept) | 10 | 0.28 | 0.24 |
| Cádiz | CPRO | (Intercept) | 11 | 0.05 | 0.18 |
| Castellón/Castelló | CPRO | (Intercept) | 12 | 0.29 | 0.21 |
| Ciudad Real | CPRO | (Intercept) | 13 | -0.23 | 0.22 |
| Córdoba | CPRO | (Intercept) | 14 | -0.53 | 0.20 |
| Coruña, A | CPRO | (Intercept) | 15 | -0.35 | 0.20 |
| Cuenca | CPRO | (Intercept) | 16 | 0.22 | 0.25 |
| Girona | CPRO | (Intercept) | 17 | 0.10 | 0.21 |
| Granada | CPRO | (Intercept) | 18 | 0.13 | 0.20 |
| Guadalajara | CPRO | (Intercept) | 19 | -0.04 | 0.24 |
| Gipuzkoa | CPRO | (Intercept) | 20 | 0.17 | 0.22 |
| Huelva | CPRO | (Intercept) | 21 | -0.02 | 0.21 |
| Huesca | CPRO | (Intercept) | 22 | 0.20 | 0.25 |
| Jaén | CPRO | (Intercept) | 23 | 0.40 | 0.21 |
| León | CPRO | (Intercept) | 24 | -0.37 | 0.23 |
| Lleida | CPRO | (Intercept) | 25 | -0.15 | 0.25 |
| Rioja, La | CPRO | (Intercept) | 26 | 0.31 | 0.25 |
| Lugo | CPRO | (Intercept) | 27 | 0.32 | 0.25 |
| Madrid | CPRO | (Intercept) | 28 | -0.06 | 0.19 |
| Málaga | CPRO | (Intercept) | 29 | 0.62 | 0.17 |
| Murcia | CPRO | (Intercept) | 30 | 0.38 | 0.20 |
| Navarra | CPRO | (Intercept) | 31 | -0.08 | 0.25 |
| Ourense | CPRO | (Intercept) | 32 | 0.13 | 0.25 |
| Asturias | CPRO | (Intercept) | 33 | 0.03 | 0.22 |
| Palencia | CPRO | (Intercept) | 34 | -0.11 | 0.26 |
| Palmas, Las | CPRO | (Intercept) | 35 | -0.34 | 0.20 |
| Pontevedra | CPRO | (Intercept) | 36 | -0.02 | 0.21 |
| Salamanca | CPRO | (Intercept) | 37 | 0.29 | 0.24 |
| Santa Cruz de Tenerife | CPRO | (Intercept) | 38 | -0.23 | 0.20 |
| Cantabria | CPRO | (Intercept) | 39 | -0.30 | 0.23 |
| Segovia | CPRO | (Intercept) | 40 | 0.19 | 0.26 |
| Sevilla | CPRO | (Intercept) | 41 | 0.07 | 0.17 |
| Soria | CPRO | (Intercept) | 42 | -0.05 | 0.26 |
| Tarragona | CPRO | (Intercept) | 43 | 0.07 | 0.20 |
| Teruel | CPRO | (Intercept) | 44 | 0.13 | 0.25 |
| Toledo | CPRO | (Intercept) | 45 | 0.14 | 0.23 |
| Valencia/València | CPRO | (Intercept) | 46 | -0.58 | 0.17 |
| Valladolid | CPRO | (Intercept) | 47 | -0.14 | 0.24 |
| Bizkaia | CPRO | (Intercept) | 48 | -0.10 | 0.20 |
| Zamora | CPRO | (Intercept) | 49 | 0.00 | 0.24 |
| Zaragoza | CPRO | (Intercept) | 50 | -0.27 | 0.25 |
| Ceuta | CPRO | (Intercept) | 51 | 0.21 | 0.31 |
| Melilla | CPRO | (Intercept) | 52 | 0.16 | 0.31 |
| 53 | CCAA | (Intercept) | Andalucía | 0.28 | 0.11 |
| 54 | CCAA | (Intercept) | Aragón | 0.02 | 0.16 |
| 55 | CCAA | (Intercept) | Asturias | 0.01 | 0.18 |
| 56 | CCAA | (Intercept) | Canarias | -0.20 | 0.16 |
| 57 | CCAA | (Intercept) | Cantabria | -0.10 | 0.18 |
| 58 | CCAA | (Intercept) | Castilla-La Mancha | 0.01 | 0.14 |
| 59 | CCAA | (Intercept) | Castilla y León | -0.01 | 0.12 |
| 60 | CCAA | (Intercept) | Cataluña | -0.15 | 0.14 |
| 61 | CCAA | (Intercept) | Ceuta | 0.07 | 0.19 |
| 62 | CCAA | (Intercept) | Comunidad de Madrid | -0.02 | 0.18 |
| 63 | CCAA | (Intercept) | Comunidad de Murcia | 0.13 | 0.18 |
| 64 | CCAA | (Intercept) | Comunidad Foral de Navarra | -0.03 | 0.18 |
| 65 | CCAA | (Intercept) | Comunidad Valenciana | -0.14 | 0.15 |
| 66 | CCAA | (Intercept) | Extremadura | 0.00 | 0.17 |
| 67 | CCAA | (Intercept) | Galicia | 0.03 | 0.15 |
| 68 | CCAA | (Intercept) | Islas Baleares | 0.02 | 0.18 |
| 69 | CCAA | (Intercept) | La Rioja | 0.11 | 0.18 |
| 70 | CCAA | (Intercept) | Melilla | 0.06 | 0.19 |
| 71 | CCAA | (Intercept) | País Vasco | -0.09 | 0.16 |
Heterocedasticity was tested on a linear model just like the final multilevel model but without the random effects. There was a little heterocedasticity but residuals didn’t distribute in clear patterns.
Regarding normality, the Shapiro-Wilk test gives a value less than 0.05, which tells us that the data do not follow a normal distribution.
However, in the QQ plot, we see that the data mostly fit the line perfectly, but at the beginning and the end, they deviate symmetrically: at the beginning, the values that separate from the line go down, and at the end, they go up. It appears to follow a normal or ‘light-tailed’ distribution. This is related to having fewer extreme values than a normal distribution.
#qq plot
residuos <- resid(modelo_multi)
qqnorm(residuos)
qqline(residuos)
shapiro.test(residuos)
##
## Shapiro-Wilk normality test
##
## data: residuos
## W = 0.99321, p-value = 0.009694
independientes <- independientes %>%
select_if(is.numeric)
# Get names of independent variables
variables_independientes <- setdiff(names(independientes), "PropiedadDifMun")
# Function to get plots & trasnform variables
generar_grafico <- function(variable) {
# Crear transformaciones
independientes[[paste0(variable, "2")]] <- independientes[[variable]]^2
independientes[[paste0(variable, "_sqrt")]] <- sqrt(independientes[[variable]])
independientes[[paste0(variable, "_log")]] <- log(independientes[[variable]] + 1)
# Lineal models
modelo_original <- lm(PropiedadDifMun ~ ., data = independientes[, c("PropiedadDifMun", variable)])
modelo_cuadrado <- lm(PropiedadDifMun ~ ., data = independientes[, c("PropiedadDifMun", variable, paste0(variable, "2"))])
modelo_sqrt <- lm(PropiedadDifMun ~ ., data = independientes[, c("PropiedadDifMun", variable, paste0(variable, "_sqrt"))])
modelo_log <- lm(PropiedadDifMun ~ ., data = independientes[, c("PropiedadDifMun", variable, paste0(variable, "_log"))])
# Create value ranges
rango_variable <- seq(min(independientes[[variable]], na.rm = TRUE), max(independientes[[variable]], na.rm = TRUE), length.out = 100)
newdata <- data.frame(variable = rango_variable)
colnames(newdata) <- variable
newdata[[paste0(variable, "2")]] <- rango_variable^2
newdata[[paste0(variable, "_sqrt")]] <- sqrt(rango_variable)
newdata[[paste0(variable, "_log")]] <- log(rango_variable + 1)
# Predictions
pred_original <- predict(modelo_original, newdata = newdata)
pred_cuadrado <- predict(modelo_cuadrado, newdata = newdata)
pred_sqrt <- predict(modelo_sqrt, newdata = newdata)
pred_log <- predict(modelo_log, newdata = newdata)
# Plot
plot(independientes[[variable]], independientes$PropiedadDifMun,
xlab = variable, ylab = "PropiedadDifMun", main = paste("Relación entre", variable, "y PropiedadDifMun"),
col = "blue", pch = 16)
lines(rango_variable, pred_original, col = "red", lwd = 2)
lines(rango_variable, pred_cuadrado, col = "green", lwd = 2, lty = 2)
lines(rango_variable, pred_sqrt, col = "purple", lwd = 2, lty = 3)
lines(rango_variable, pred_log, col = "orange", lwd = 2, lty = 4)
legend("topright", legend = c("Datos", "Original", "Cuadrado", "Raíz Cuadrada", "Logaritmo"),
col = c("blue", "red", "green", "purple", "orange"), pch = c(16, NA, NA, NA, NA),
lwd = c(NA, 2, 2, 2, 2), lty = c(NA, 1, 2, 3, 4))
}
# Create variables per each independent variable
for (variable in variables_independientes) {
generar_grafico(variable)
}
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in log(independientes[[variable]] + 1): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
## Warning in log(rango_variable + 1): Se han producido NaNs
## Warning in sqrt(independientes[[variable]]): Se han producido NaNs
## Warning in sqrt(rango_variable): Se han producido NaNs
variables_independientes <- c("ipv2021General", "PorcentajeJovenDif", "VaciasDif", "SecundariasDif", "DifPrecioHipotecaProv", "ParoJuvenilDif", "ProtegidaDif", "RentaMedia2021", "PorcentajeTurísticas2021", "PrecioAlquilerM2", "Herencia11Mun")
par(mfrow = c(4, 3))
independientes <- independientes %>%
select_if(is.numeric)
# Create histograms per each variable
for (variable in variables_independientes) {
if (is.numeric(independientes[[variable]])) {
hist(independientes[[variable]], main = paste("Histograma de", variable),
xlab = variable, col = "skyblue", border = "white")
} else {
warning(paste("Variable", variable, "is not numeric and will be skipped."))
}
}
## Warning: Variable ipv2021General is not numeric and will be skipped.
I keep all variables included in the multilevel model and the variables of the Difference in all tenure regimes.
municipios <- read_xlsx("municipios.xlsx")
municipios <- municipios |>
mutate(MUN_LITERAL = ifelse(grepl("\\d", MUN_LITERAL), paste(MUN_LITERAL, PROV_LITERAL), MUN_LITERAL))
# Keep province name in the rural municipalities names
names = municipios$MUN_LITERAL
prov = municipios$PROV_LITERAL
ccaa = municipios$CCAA
numeric_data <- municipios %>%
ungroup() |>
select_if(is.numeric)
numeric_data <- scale(numeric_data)
numeric_data <- as.data.frame(numeric_data)
# Select variables from multilevel model
selected_vars <- numeric_data %>%
select(PropiedadDifMun, AlquilerDifMun, OtroDifMun, Herencia11Mun, PorcentajeJovenDif, VaciasDif, SecundariasDif, PorcentajeNoEuropeaDif, DifPrecioHipotecaProv, ParoJuvenilDif, ProtegidaDif, PorcentajeTurísticas2021, PrecioAlquilerM2, RentaMedia2021)
# Creating the components
set.seed(123)
pca = prcomp(selected_vars, scale=T)
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.7991 1.4413 1.3021 1.2367 1.12508 0.96951 0.89003
## Proportion of Variance 0.2312 0.1484 0.1211 0.1092 0.09041 0.06714 0.05658
## Cumulative Proportion 0.2312 0.3796 0.5007 0.6099 0.70035 0.76748 0.82407
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 0.77704 0.73491 0.63617 0.61180 0.58372 0.44658
## Proportion of Variance 0.04313 0.03858 0.02891 0.02674 0.02434 0.01425
## Cumulative Proportion 0.86720 0.90577 0.93468 0.96142 0.98575 1.00000
## PC14
## Standard deviation 0.0004835
## Proportion of Variance 0.0000000
## Cumulative Proportion 1.0000000
# Plot of the variability explained by components
fviz_screeplot(pca, addlabels = TRUE)
# Contribution of variables to component 1
fviz_contrib(pca, choice = "var", axes = 1)
# Plot adjustments
n <- 10
col_pos <- colorRampPalette(c("#CCE6CC", "#208909"))(n)
col_neg <- colorRampPalette(c("#FFE6E6", "#B30000"))(n)
coefs <- pca$rotation[,1]
valor_max <- max(coefs)
valor_min <- min(coefs)
coefs_ajustados <- coefs - valor_min + 1
colores <- ifelse(coefs >= 0, col_pos[ceiling((coefs_ajustados / (valor_max - valor_min + 1)) * n)],
col_neg[ceiling((abs(coefs_ajustados) / (valor_max - valor_min + 1)) * n)])
# Plot
barplot(coefs, las = 2, col = colores, cex.names = 0.8)
#Ranking of municipalities of this component
names[order(pca$x[,1], decreasing=T)][1:5]
## [1] "<=2000 Lugo" "<=2000 Ourense"
## [3] "2001 <= 5000 Lugo" "2001 <= 5000 Palmas, Las"
## [5] "<=2000 Ciudad Real"
names[order(pca$x[,1])][1:5]
## [1] "Alcobendas" "Pozuelo de Alarcón"
## [3] "Boadilla del Monte" "San Sebastián de los Reyes"
## [5] "Rozas de Madrid, Las"
# Contribution of variables to component 1
fviz_contrib(pca, choice = "var", axes = 2)
# Plot adjustments
n <- 10
col_pos <- colorRampPalette(c("#CCE6CC", "#208909"))(n)
col_neg <- colorRampPalette(c("#FFE6E6", "#B30000"))(n)
coefs <- pca$rotation[,2]
valor_max <- max(coefs)
valor_min <- min(coefs)
coefs_ajustados <- coefs - valor_min + 1
colores <- ifelse(coefs >= 0, col_pos[ceiling((coefs_ajustados / (valor_max - valor_min + 1)) * n)],
col_neg[ceiling((abs(coefs_ajustados) / (valor_max - valor_min + 1)) * n)])
# Plot
barplot(coefs, las = 2, col = colores, cex.names = 0.8)
#Ranking of municipalities of this component
names[order(pca$x[,2], decreasing=T)][1:5]
## [1] "Ceuta" "Oliva, La"
## [3] "Sant Josep de sa Talaia" "Mogán"
## [5] "Adeje"
names[order(pca$x[,2])][1:5]
## [1] "Torre-Pacheco" "Mazarrón" "Águilas"
## [4] "Lorca" "2001 <= 5000 Murcia"
p <-data.frame(z1 = pca$x[,1], z2 = pca$x[,2]) %>%
ggplot(aes(z1, z2, label = names, color = ccaa)) +
geom_point(size = 0) +
labs(title = "First two principal components (scores)", x = "PC1", y = "PC2") + theme_bw() + theme(legend.position = "bottom") +
geom_text(size = 3, hjust = 0.6, vjust = 0, check_overlap = T)
p
# Versión interactiva
library(plotly)
##
## Attaching package: 'plotly'
## The following objects are masked from 'package:flextable':
##
## highlight, style
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(p)
How many clusters? Two methods suggest 2, the third one 5.
fviz_nbclust(scale(selected_vars), kmeans, method = 'wss', k.max = 20, nstart = 1000)
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
fviz_nbclust(scale(selected_vars), kmeans, method = 'silhouette', k.max = 20, nstart = 1000)
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
fviz_nbclust(scale(selected_vars), kmeans, method = 'gap_stat', k.max = 10, nstart = 100, nboot = 100)
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
Cluster plot
set.seed(123)
fit <- kmeans(selected_vars, centers = 5, nstart = 100)
fit
## K-means clustering with 5 clusters of sizes 138, 258, 28, 37, 123
##
## Cluster means:
## PropiedadDifMun AlquilerDifMun OtroDifMun Herencia11Mun PorcentajeJovenDif
## 1 0.9685854 -0.8477602 -0.30567771 0.6610622 0.03787514
## 2 -0.2728040 0.1529839 0.22535334 -0.2840823 -0.27206479
## 3 -0.1165844 -0.4909389 0.99902870 1.6815336 0.08372657
## 4 -0.4613462 1.0749195 -0.94344746 -0.2531881 0.34197068
## 5 -0.3491640 0.4186611 -0.07335679 -0.4524255 0.40624948
## VaciasDif SecundariasDif PorcentajeNoEuropeaDif DifPrecioHipotecaProv
## 1 0.8928064 0.7570246 -0.47491431 -0.4932235
## 2 -0.4309271 0.2337835 -0.01691201 -0.3684490
## 3 1.2959531 -0.3799071 -0.59594063 0.8727930
## 4 -0.2021411 -0.7524592 0.14505481 -0.4309580
## 5 -0.3319965 -1.0268874 0.66033157 1.2571701
## ParoJuvenilDif ProtegidaDif PorcentajeTurísticas2021 PrecioAlquilerM2
## 1 0.1160849 0.17555398 -0.17474946 -0.7768667
## 2 -0.2308582 0.32614666 -0.18164308 -0.3481197
## 3 1.6124995 0.06949617 3.11462238 0.2697267
## 4 1.7196120 -2.65495182 -0.31535770 1.6145831
## 5 -0.5303571 -0.09825174 -0.03708822 1.0547199
## RentaMedia2021
## 1 -0.64673846
## 2 -0.03578979
## 3 -0.30194666
## 4 -0.61340852
## 5 1.05393737
##
## Clustering vector:
## [1] 4 4 4 4 1 2 2 1 2 2 1 1 1 2 2 2 2 2 2 2 5 3 5 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [38] 2 1 2 1 2 2 2 2 2 5 2 5 2 1 1 1 2 1 1 1 2 2 2 2 2 2 3 3 3 5 3 5 5 5 5 5 5
## [75] 5 5 5 3 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [112] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 2 1 1 2 2 1 1 1 2 1 2 1 1 5 1 2 1 1 2 3
## [149] 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1 2 2 1 2 1 1 1 1 2 2
## [186] 2 2 2 1 2 1 2 1 2 1 2 1 2 2 2 2 2 2 1 2 1 1 2 1 2 2 2 5 5 2 5 5 2 5 2 5 5
## [223] 1 2 2 1 3 2 2 2 1 2 2 1 2 2 2 2 2 4 4 4 4 4 4 4 4 4 4 1 2 1 1 1 2 2 2 2 2
## [260] 1 1 2 2 2 1 1 1 1 1 1 2 2 1 1 1 2 2 1 2 2 2 2 2 2 2 5 1 1 2 2 1 2 1 1 1 2
## [297] 1 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
## [334] 5 1 1 1 5 1 1 2 5 2 5 1 5 5 5 5 3 5 1 2 5 1 2 2 1 2 1 2 2 2 1 2 2 2 2 2 2
## [371] 2 2 2 2 2 4 4 4 4 4 4 1 1 1 1 1 1 1 2 1 1 2 2 1 1 2 2 1 2 1 2 3 2 3 2 3 2
## [408] 2 2 3 3 2 2 3 2 3 3 2 1 1 1 1 3 1 1 1 2 2 1 1 1 1 2 2 1 2 3 3 3 3 3 3 4 3
## [445] 3 1 3 1 3 1 1 1 2 1 2 2 2 2 2 2 2 1 1 2 2 1 1 2 2 1 2 2 1 1 2 1 1 2 2 2 1
## [482] 2 2 2 2 2 1 2 2 2 1 2 2 2 2 5 2 2 5 2 2 2 2 5 1 1 2 2 2 2 1 1 2 2 2 2 1 2
## [519] 2 2 2 2 2 2 5 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 5 2 2 2 2
## [556] 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 2 2 1 2 1 2 1 2 2 4 5
##
## Within cluster sum of squares by cluster:
## [1] 968.2069 1740.3895 789.0999 337.2357 1429.2597
## (between_SS / total_SS = 35.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
selected_vars$cluster <- fit$cluster
centers=fit$centers
barplot(centers[1,], las=2, col="#097969")
barplot(centers[2,], las=2, col="#097969")
p <-fviz_cluster(fit, data = selected_vars, geom = c("point"), ellipse.type = 'norm', pointsize = 1, show.clust.cent = FALSE) +
theme_minimal() + geom_text(label = names, hjust = 0, vjust = 0, size = 2, check_overlap = TRUE) +
scale_fill_brewer(palette = "Paired", guide = FALSE)
p
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
## Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggsave(filename = "cluster_plot.png", plot = p, width = 8, height = 4, dpi = 300)
library(plotly)
ggplotly(p)
This model was tested although not used for the final work.
If I do not balance the classes, 96% of predictions is a decrease and 32% is an increase in property. The accuracy is 85%, which is logical because the classes are unbalanced. We see that the specificity is 96%, meaning it accurately predicts decreases, but the sensitivity is only 0.32, meaning it poorly detects increases.
If the classes are balanced with N=1000, the accuracy drops to 72%, specificity to 71%, but the sensitivity increases to 73%.
# Preparar datos
municipios <- read_xlsx("municipios.xlsx")
municipios <- municipios |>
mutate(MUN_LITERAL = ifelse(grepl("\\d", MUN_LITERAL), paste(MUN_LITERAL, PROV_LITERAL), MUN_LITERAL))
numeric_data <- municipios %>%
ungroup() |>
select_if(is.numeric)
numeric_data <- numeric_data %>%
mutate(Propiedad = ifelse(PropiedadDifMun >= 0, 1, 0)) %>%
select(Propiedad, RentaMedia2021, PorcentajeJoven2021, PorcentajeJovenDif, PorcentajeVacias2021, PorcentajeSecundarias2021, PorcentajeEspañola2021, PorcentajeProtegidas2021, ProtegidaDif, PrecioAlquilerM2)
# Equilibrar clases
prop.table(table(numeric_data$Propiedad))
##
## 0 1
## 0.90924658 0.09075342
library(ROSE)
## Loaded ROSE 0.0-4
numeric_data <- ovun.sample(Propiedad~.,
data = numeric_data,
method = "over",
N = 1000)$data
table(numeric_data$Propiedad)
##
## 0 1
## 531 469
# Modelo
modelo <- glm(Propiedad ~ ., data = numeric_data, family = binomial(link = "logit"))
summary(modelo)
##
## Call:
## glm(formula = Propiedad ~ ., family = binomial(link = "logit"),
## data = numeric_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.809e+00 1.945e+00 -5.043 4.59e-07 ***
## RentaMedia2021 1.293e-04 2.274e-05 5.687 1.29e-08 ***
## PorcentajeJoven2021 1.932e-02 2.166e-02 0.892 0.3725
## PorcentajeJovenDif 6.972e-02 3.514e-02 1.984 0.0472 *
## PorcentajeVacias2021 8.358e-02 1.234e-02 6.771 1.27e-11 ***
## PorcentajeSecundarias2021 4.906e-02 8.625e-03 5.688 1.29e-08 ***
## PorcentajeEspañola2021 2.950e-02 1.201e-02 2.457 0.0140 *
## PorcentajeProtegidas2021 -9.434e+00 3.883e+00 -2.430 0.0151 *
## ProtegidaDif -6.475e+00 1.616e+00 -4.005 6.19e-05 ***
## PrecioAlquilerM2 7.639e-02 6.327e-02 1.207 0.2273
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1382.4 on 999 degrees of freedom
## Residual deviance: 1062.2 on 990 degrees of freedom
## AIC: 1082.2
##
## Number of Fisher Scoring iterations: 5
# Matriz de confusión
prob_pred <- predict(modelo, type = "response")
clases_pred <- ifelse(prob_pred > 0.5, 1, 0)
confusion_matrix <-table(Real = numeric_data$Propiedad, Predicho = clases_pred)
confusion_matrix
## Predicho
## Real 0 1
## 0 417 114
## 1 173 296
# Accuracy
sum(diag(confusion_matrix)) / sum(confusion_matrix)
## [1] 0.713
# Sensibilidad
confusion_matrix[2, 2] / sum(confusion_matrix[2, ])
## [1] 0.6311301
# Especificidad
confusion_matrix[1, 1] / sum(confusion_matrix[1, ])
## [1] 0.7853107
# Calcular las medias para cada régimen en 2011 y 2021
media_2011 <- propiedad %>%
summarise(
Property11 = mean(Propiedad11Mun, na.rm = TRUE),
Rent11 = mean(Alquiler11Mun, na.rm = TRUE),
Other11 = mean(Otro11Mun, na.rm = TRUE))
media_2021 <- propiedad %>%
summarise(
Property21 = mean(Propiedad21Mun, na.rm = TRUE),
Rent21 = mean(Alquiler21Mun, na.rm = TRUE),
Other21 = mean(Otro21Mun, na.rm = TRUE)
)
# Combinar los datos en un solo dataframe
medias <- tibble(
Year = c(2011, 2021),
Property = c(media_2011$Property11, media_2021$Property21),
Rent = c(media_2011$Rent11, media_2021$Rent21),
Other = c(media_2011$Other11, media_2021$Other21)
)
# Transformar los datos a formato largo y modificar el orden de "Regime"
medias_long <- medias %>%
pivot_longer(cols = c(Property, Rent, Other),
names_to = "Regime",
values_to = "Value") %>%
mutate(Regime = factor(Regime, levels = c("Property", "Rent", "Other"))) # Modificar el orden de la variable "Regime" para la leyenda
evo <- ggplot(medias_long, aes(x = Year, y = Value, color = Regime, group = Regime)) +
geom_line(size = 0.7) +
geom_point(size = 2, stroke = 0.5) + # Ajustar el ancho de la línea de los puntos
geom_text(aes(label = paste0(round(Value, 1), "%")), vjust = -0.8, family = "Calibri", size = 4.2, color = "black") + # Agregar el signo de porcentaje y cambiar el color a negro
scale_color_manual(values=c("Property"="#ff7767", "Rent"="#19CF8B", "Other" = "#FFA500")) + # Cambiar el color de "Otro"
labs(title="Evolution of tenure regimes in Spain in 2011-2021 (targeted population)", # Eliminar el título
caption= "Source: Self-elaboration from Census data", # Eliminar la leyenda
x=NULL,
y=NULL,
color=NULL) +
scale_y_continuous(labels = scales::percent_format(scale = 1)) +
scale_x_continuous(breaks = c(2011, 2021), limits = c(2011, 2021)) + # Establecer los límites y las marcas del eje x
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, vjust = 6.5, size = 16, family = "Times New Roman", color = "black", face = "bold"),
plot.caption = element_text(hjust = -0.1, vjust=-5, family = "Times New Roman", color="#555555", size = 12), # Cambiar la fuente de la leyenda
axis.title = element_blank(),
axis.text.x = element_text(color="#333333", size = 12, family = "Times New Roman"), # Ajustar el tamaño de la fuente del texto del eje x
axis.text.y = element_text(color="#333333", size = 12, family = "Times New Roman"), # Ajustar el tamaño de la fuente del texto del eje y
axis.line.x = element_line(size = 0.5, color = "black"),
axis.line.y = element_line(size = 0.5, color = "black"),
legend.text = element_text(margin = margin(t = 0, r = 0.5, b = 0, l = 0), color="#333333", family = "Calibri", size = 12),
panel.background = element_rect(fill = "white", color = NA), # Remove border
panel.grid.major = element_line(color = "grey80", size = 0.5),
panel.grid.minor = element_blank(),
panel.border = element_blank(), # Remove panel border
plot.margin = unit(c(1, 0.5, 1.2, 1), "cm")) +
guides(color = guide_legend(override.aes = list(shape = "square", size = 3, linetype = "blank")))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggsave("evolucion.png", plot = evo, width = 8, height = 5.7, bg = "white")
municipios <- municipios |>
mutate(MUN_LITERAL = ifelse(grepl("\\d", MUN_LITERAL), paste(MUN_LITERAL, PROV_LITERAL), MUN_LITERAL))
names = municipios$MUN_LITERAL
prov = municipios$PROV_LITERAL
ccaa = municipios$CCAA
library(ggplot2)
scatterplot <- ggplot(municipios, aes(x = PropiedadDifMun, y = MUN_LITERAL, color = CCAA)) +
geom_point(aes(text = paste(MUN_LITERAL, "PropiedadDifMun:", PropiedadDifMun))) +
geom_text(aes(label = MUN_LITERAL), hjust = 1.5, vjust = 0.5, size = 2.5) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(title = "Scatterplot Interactivo de Municipios",
x = "PropiedadDifMun",
y = "")
## Warning in geom_point(aes(text = paste(MUN_LITERAL, "PropiedadDifMun:", :
## Ignoring unknown aesthetics: text
# Convertirlo en un gráfico interactivo con ggplotly
ggplotly(scatterplot, tooltip = "text")